Manajemen data infeksi dan vaksinasi Covid-19 seluruh negara.
Infected Case
Sumber: Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (GitHub)
URL: https://github.com/CSSEGISandData/COVID-19
Terdiri dari tiga data set:
Population dan Kode Negara
Sumber: Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (GitHub)
Histori Vaksin
Sumber: Our World in Data (Github)
Vaccine manufacture
Sumber: Our World in Data (Github)
Package
basetidyverse (dplyr, readr, lubridate)Function
read_csvselectrenamearrangegroup_bymutatesummariseleft_joinpivot_widerpivot_longerlibrary(tidyverse)
library(lubridate)
Sumber data:
Input:
Output:
infected <- function(url){
read_csv(url) %>%
rename_all(make.names) %>%
pivot_longer(cols = starts_with("X"),
names_to = "last_update",
values_to = "total") %>%
mutate(last_update = mdy(sub("X","", last_update))) %>%
rename(country = Country.Region) %>%
group_by(country, last_update) %>%
summarise(total = sum(total)) %>%
ungroup()
}
url_confirmed <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
url_recovered <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
url_death <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
confirmed <- read_csv(url_confirmed)
confirmed
# join all together
infected_case <- infected(url_confirmed) %>%
left_join(infected(url_recovered),
by = c("country", "last_update")) %>%
left_join(infected(url_death)
, by = c("country", "last_update")) %>%
rename(total_confirmed = total.x,
total_recovered = total.y,
total_death = total) %>%
mutate(total_active = total_confirmed - (total_recovered + total_death)) %>%
mutate(across(starts_with("total"), ~replace_na(., 0)))
infected_case
Sumber data:
Input:
Output:
url <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
country <- read_csv(url)
country
country <- country %>%
filter(UID == code3) %>%
select (iso_code = iso3,
country = Country_Region,
latitude = Lat,
longitude = Long_,
population = Population)
country
Sumber data:
Input:
total_vaccinations : total vaksinasipeople_vaccinated : total penduduk yang sudah divaksinpeople_fully_vaccinated : total penduduk yang sudah divaksin lengkaptotal_boosters : total vaksinasi boosterOutput:
url <- "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv"
vaccination <- read_csv(url)
vaccination
vaccination <- vaccination %>%
select(location:total_boosters) %>%
mutate(across(total_vaccinations:total_boosters,
~na_if(., 0))) %>%
group_by(location, iso_code) %>%
arrange(location, iso_code, date) %>%
fill(total_vaccinations:total_boosters,
.direction = "down") %>%
ungroup() %>%
mutate(across(total_vaccinations:total_boosters,
~replace_na(., 0))) %>%
rename(country = location, last_update = date)
vaccination
Sumber data:
Input/Output:
vaccines,.url <- "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/locations.csv"
vaccine_manufacture <- read_csv(url)
vaccine_manufacture
Input:
infected_casecountryvaccinationOuput:
infected_case dengan country berdasarkan kolom country (kedua data berasal dari sumber yang sama (JHU), sehingga penulisan nama negara juga sama)vaccination berdasarkan kolom iso_code (karena data berasal dari sumber berbeda, sangat direkomendasikan untuk melakukan join menggunakan key/code yang sudah baku)c19_daily <- infected_case %>%
left_join(country, by = "country") %>%
select(iso_code, country, population, last_update,
starts_with("total")) %>%
left_join(vaccination, by = c("iso_code", "last_update")) %>%
group_by(iso_code) %>%
arrange(iso_code, last_update) %>%
fill(total_vaccinations:total_boosters,
.direction = "down") %>%
ungroup() %>%
mutate(across(total_vaccinations:total_boosters,
~replace_na(., 0))) %>%
rename(country = country.x) %>%
select(-country.y)
c19_daily
c19_daily %>%
pivot_longer(c(total_confirmed, total_death, total_vaccinations),
names_to = "grp",
values_to = "total") %>%
filter(total > 0) %>%
group_by(iso_code, grp) %>%
summarise(last_update = min(last_update)) %>%
ungroup() %>%
pivot_wider(names_from = grp, values_from = last_update) %>%
rename(first_confirmed = total_confirmed,
first_death = total_death,
first_vaccination = total_vaccinations)
Input:
infected_casecountryvaccinationvaccine_manufactureOuput:
c19_latest <- c19_daily %>%
# get latest data
group_by(iso_code, country) %>%
mutate(latest_date = max(last_update)) %>%
ungroup() %>%
filter(latest_date == last_update) %>%
# get first_event
left_join(
c19_daily %>%
pivot_longer(c(total_confirmed, total_death, total_vaccinations),
names_to = "grp",
values_to = "total") %>%
filter(total > 0) %>%
group_by(iso_code, grp) %>%
summarise(last_update = min(last_update)) %>%
ungroup() %>%
pivot_wider(names_from = grp, values_from = last_update) %>%
rename(first_confirmed = total_confirmed,
first_death = total_death,
first_vaccination = total_vaccinations)) %>%
# vaccine manufacture
left_join(vaccine_manufacture, by = "iso_code") %>%
select(iso_code:last_update,
first_confirmed, first_death,
total_confirmed:total_active,
first_vaccination,
total_vaccinations:total_boosters,
vaccines_manufacture = vaccines)
c19_latest